import rasterio
import os
import numpy as np
import matplotlib.pyplot as plt
#normalisation function
normalise = lambda x : x / np.max(x)
# Dioni
# read
dioni = rasterio.open(os.path.join(os.pardir, 'data', 'HyRANK_satellite', 'TrainingSet', 'Dioni.tif')).read()
#normalise
dioni = np.array(list(map(normalise, [dioni[i, :, :] for i in range(dioni.shape[0])])))
# view shape
print(dioni.shape)
# Dioni GT
dioni_gt = rasterio.open(os.path.join(os.pardir, 'data', 'HyRANK_satellite', 'TrainingSet', 'Dioni_GT.tif')).read()
print(dioni_gt.shape)
# Loukia
loukia = rasterio.open(os.path.join(os.pardir, 'data', 'HyRANK_satellite', 'TrainingSet', 'Loukia.tif')).read()
loukia = np.array(list(map(normalise, [loukia[i, :, :] for i in range(loukia.shape[0])])))
print(loukia.shape)
# Loukia GT
loukia_gt = rasterio.open(os.path.join(os.pardir, 'data', 'HyRANK_satellite', 'TrainingSet', 'Loukia_GT.tif')).read()
print(loukia_gt.shape)
# Erato
erato = rasterio.open(os.path.join(os.pardir, 'data', 'HyRANK_satellite', 'TestSet', 'Erato.tif')).read()
erato = np.array(list(map(normalise, [erato[i, :, :] for i in range(erato.shape[0])])))
print(erato.shape)
# Kirki
kirki = rasterio.open(os.path.join(os.pardir, 'data', 'HyRANK_satellite', 'TestSet', 'Kirki.tif')).read()
kirki = np.array(list(map(normalise, [kirki[i, :, :] for i in range(kirki.shape[0])])))
print(kirki.shape)
# Nefeli
nefeli = rasterio.open(os.path.join(os.pardir, 'data', 'HyRANK_satellite', 'TestSet', 'Nefeli.tif')).read()
nefeli = np.array(list(map(normalise, [nefeli[i, :, :] for i in range(nefeli.shape[0])])))
print(nefeli.shape)
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 8]
dioni_composite = dioni[[23, 11, 7], :, :]
plt.imshow(dioni_composite.transpose(1,2,0))
plt.title("Dioni")
plt.show()
plt.imshow(dioni_gt.squeeze(0))
plt.title("Dioni ground truth")
plt.show()
loukia_composite = loukia[[23, 11, 7], :, :]
plt.imshow(loukia_composite.transpose(1,2,0))
plt.title("Loukia")
plt.show()
loukia_composite = loukia[:, loukia.shape[1]//2:, :][[23, 11, 7], :, :]
plt.imshow(loukia_composite.transpose(1,2,0))
plt.title("Loukia")
plt.show()
plt.imshow(loukia_gt.squeeze(0))
plt.title("Loukia ground truth")
plt.show()
erato_composite = erato[[23, 11, 7], :, :]
plt.imshow(erato_composite.transpose(1,2,0))
plt.title("Erato")
plt.show()
kirki_composite = kirki[[23, 11, 7], :, :]
plt.imshow(kirki_composite.transpose(1,2,0))
plt.title("Kirki")
plt.show()
nefeli_composite = nefeli[[23, 11, 7], :, :]
plt.imshow(nefeli_composite.transpose(1,2,0))
plt.title("Nefeli")
plt.show()
import matplotlib.pyplot as plt
import json
import numpy as np
def hist(y, title):
fig, ax = plt.subplots()
freqs = np.bincount(y)
ticks = np.arange(len(freqs))
print(title, freqs)
ax.barh(ticks, freqs)
labs = ["Unknown", "Dense Urban Fabric", "Mineral Extraction Sites", "Non irrigated arable land", "Fruit trees",
"Olive Groves", "Broad leaved forest", "Coniferous Forest", "Mixed forest",
"Dense Sclerophyllous Vegetation", "Sparse Sclerophyllous Vegetation", "Sparsely Vegetated Areas",
"Rocks and Sand", "Water", "Coastal Water"]
ax.set_yticks(np.arange(len(ticks)))
ax.set_yticklabels(labs)
#plt.xticks(ticks=np.arange(len(ticks)), labels=labs, rotation=90)
ax.set_title(title)
#plt.title(title)
plt.show()
return plt
hist(loukia_gt.reshape(-1), "Loukia pixel distribution")
hist(dioni_gt.reshape(-1), "Dioni pixel distribution")
hist(dioni_gt[dioni_gt != 0].reshape(-1), "Dioni labeled pixel distribution")
hist(loukia_gt[loukia_gt != 0].reshape(-1), "Loukia labeled pixel distribution")
The split here is set in the same manner as in the neural network section so as to have a common baseline. It is essential to set exclusive subsets of each image to functions as banks of train / test / validation pixels so that pixels from the same labeled patches do not exist both in train and test sets because this would be like "cheating" given the fact that close pixels are very similar to each other. Following this method an accuracy of 95% on the test set was achieved when the SVM was tuned based on random CV splits of shuffled splits from both images. However this was not considered as a meaningful prediction given that such a split rendered the problem much easier without actually depicting the real difficulty of the dataset.
import os
import numpy as np
train = [dioni[:, dioni.shape[1]//8:, :],
loukia[:, loukia.shape[1]//4:, :]]
test = [dioni[:, :dioni.shape[1]//8, :],
loukia[:, :loukia.shape[1]//8, :]]
train_gt = [dioni_gt[:, dioni_gt.shape[1]//8:, :],
loukia_gt[:, loukia_gt.shape[1]//4:, :]]
test_gt = [dioni_gt[:, :dioni_gt.shape[1]//8, :],
loukia_gt[:, :loukia_gt.shape[1]//8, :]]
dioni_composite = train[0][[23, 11, 7], :, :]
plt.imshow(dioni_composite.transpose(1,2,0))
plt.title("Train 1")
plt.show()
dioni_composite = train_gt[0][:, :, :].squeeze(0)
plt.imshow(dioni_composite)
plt.title("Train 1 GT")
plt.show()
dioni_composite = train[1][[23, 11, 7], :, :]
plt.imshow(dioni_composite.transpose(1,2,0))
plt.title("Train 2")
plt.show()
dioni_composite = train_gt[1][:, :, :].squeeze(0)
plt.imshow(dioni_composite)
plt.title("Train 2 GT")
plt.show()
dioni_composite = test[0][[23, 11, 7], :, :]
plt.imshow(dioni_composite.transpose(1,2,0))
plt.title("Test 1")
plt.show()
dioni_composite = test_gt[0][:, :, :].squeeze(0)
plt.imshow(dioni_composite)
plt.title("Test 1 GT")
plt.show()
dioni_composite = test[1][[23, 11, 7], :, :]
plt.imshow(dioni_composite.transpose(1,2,0))
plt.title("Test 2")
plt.show()
dioni_composite = test_gt[1][:, :, :].squeeze(0)
plt.imshow(dioni_composite)
plt.title("Test 2 GT")
plt.show()
## Old split
# X1 = dioni.transpose(1, 2, 0)
# print("Initial shape: ", X1.shape)
# X1 = X1.reshape(-1, X1.shape[2])
# # X
# print("X")
# X1 = dioni.transpose(1, 2, 0)
# print("Initial shape: ", X1.shape)
# X1 = X1.reshape(-1, X1.shape[2])
# print("Final shape: ", X1.shape)
# X2 = loukia.transpose(1, 2, 0)
# print("Initial shape: ", X2.shape)
# X2 = X2.reshape(-1, X2.shape[2])
# print("Final shape: ", X2.shape)
# X = np.concatenate([X1, X2])
# print("Final X shape: ", X.shape)
# # y
# print("Y")
# print("Initial shape: ", dioni_gt.shape)
# y1 = dioni_gt.reshape(-1)
# print("Final shape: ", y1.shape)
# print("Initial shape: ", loukia_gt.shape)
# y2 = loukia_gt.reshape(-1)
# print("Final shape: ", y2.shape)
# y = np.concatenate([y1, y2])
# print("Final y shape: ", y.shape)
Train1 and Train2 images pixels are concatenated inside a single pixel based training dataset along with their corresponding labels. Test1 and Test2 images pixels are concatenated inside a single pixel based testing dataset along with their corresponding labels. The validation set creation was omitted here given that a gridsearch approch was followed in order to tune the SVM which automatically applies train / validation splitting to the existing pixel-based training set.
dioni_train_pixels = train[0].transpose(1, 2, 0).reshape(-1, train[0].shape[0])
loukia_train_pixels = train[1].transpose(1, 2, 0).reshape(-1, train[0].shape[0])
X_train = np.concatenate([dioni_train_pixels, loukia_train_pixels])
dioni_train_pixels_gt = train_gt[0].transpose(1, 2, 0).reshape(-1, train_gt[0].shape[0])
loukia_train_pixels_gt = train_gt[1].transpose(1, 2, 0).reshape(-1, train_gt[0].shape[0])
y_train = np.concatenate([dioni_train_pixels_gt, loukia_train_pixels_gt]).reshape(-1)
dioni_test_pixels = test[0].transpose(1, 2, 0).reshape(-1, test[0].shape[0])
loukia_test_pixels = test[1].transpose(1, 2, 0).reshape(-1, test[0].shape[0])
X_test = np.concatenate([dioni_test_pixels, loukia_test_pixels])
dioni_test_pixels_gt = test_gt[0].transpose(1, 2, 0).reshape(-1, test_gt[0].shape[0])
loukia_test_pixels_gt = test_gt[1].transpose(1, 2, 0).reshape(-1, test_gt[0].shape[0])
y_test = np.concatenate([dioni_test_pixels_gt, loukia_test_pixels_gt]).reshape(-1)
print(X_test.shape, y_test.shape)
Non labeled pixels are removed from the training and test sets.
# X = X[y!=0]
# y = y[y!=0]
# y=y-1
X_train = X_train[y_train!=0]
y_train = y_train[y_train!=0]
y_train=y_train-1
print("Final shapes: ", X_train.shape, y_train.shape)
print("Classes: ", np.unique(y_train))
# X_val = X_val[y_val!=0]
# y_val = y_val[y_val!=0]
# y_val=y_val-1
# print("Final shapes: ", X_val.shape, y_val.shape)
# print("Classes: ", np.unique(y_val))
X_test = X_test[y_test!=0]
y_test = y_test[y_test!=0]
y_test=y_test-1
print("Final shapes: ", X_test.shape, y_test.shape)
print("Classes: ", np.unique(y_test))
# from sklearn.model_selection import train_test_split
# # Split to training and test set
# X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=69, stratify=y)
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn import preprocessing
import pandas as pd
from sklearn.model_selection import StratifiedKFold
Gridsearch is applied. Validation set is merged with training set as the algorithm uses Cross Validation so different inner splits are created.
#Set the parameters of each model by cross-validation gridsearch
models = {'SVM': SVC()}
param_grid = [{'kernel': ['rbf'], 'C': [1, 100, 1000]}]
best_scores=[]
params=[]
# Below I locate the best svm classifiers for each kernel and the best knn classifier and compute their score
# on the test set
for (a, tp, name) in list(zip(models.values(), param_grid, models.keys())):
print("\n\nTuning hyper-parameters, based on accuracy for {} with parameter grid:\n {}\n"
.format(name, tp))
clf = GridSearchCV(a, tp, cv = StratifiedKFold(5) , scoring = 'accuracy', n_jobs=-1)
clf.fit(X_train, y_train)
print("Mean performance of each parameter combination based on Cross Validation")
performance = pd.DataFrame(clf.cv_results_['params'])
performance["Score"] = clf.cv_results_['mean_test_score']
print(performance)
print("\nBest parameters set found on training set:")
print(clf.best_params_)
params.append(clf.best_params_)
print("\nThe scores are computed on the full evaluation set:")
#evaluate and store scores of estimators of each category on validation set
score = clf.score(X_test, y_test)
print("Accuracy:", score)
best_scores.append(score)
final_scores = dict(zip(list(models.keys()), best_scores))
print("\n\nThe best accuracies achieved by the algorithms are:", final_scores)
# clf = SVC(kernel="rbf", C=1e4)
# clf.fit(X_train, y_train)
# score = clf.score(X_val, y_val)
# print("Accuracy on validation set:", score)
clf = SVC(kernel="rbf", C=100)
X = np.concatenate([X_train, X_test])
y = np.concatenate([y_train, y_test])
clf.fit(X, y)
classify = lambda x:clf.predict(x)
Erato
width = erato.shape[1]
height = erato.shape[2]
erato_inference = classify(erato.transpose(1,2,0).reshape((width*height, -1))).reshape((width, height))
plt.imshow(erato_composite.transpose(1,2,0))
plt.title("Erato")
plt.show()
plt.imshow(erato_inference)
plt.title("Erato infered classes")
plt.legend
plt.show()
Nefeli
width = nefeli.shape[1]
height =nefeli.shape[2]
nefeli_inference = classify(nefeli.transpose(1,2,0).reshape((width*height, -1))).reshape((width, height))
plt.imshow(nefeli_composite.transpose(1,2,0))
plt.title("nefeli")
plt.show()
plt.imshow(nefeli_inference)
plt.title("nefeli infered classes")
plt.show()
Kirki
width = kirki.shape[1]
height =kirki.shape[2]
kirki_inference = classify(kirki.transpose(1,2,0).reshape((width*height, -1))).reshape((width, height))
plt.imshow(kirki_composite.transpose(1,2,0))
plt.title("kirki")
plt.show()
plt.imshow(kirki_inference)
plt.title("kirki infered classes")
plt.show()